Fit Initial BG/NBD Models
1 Load Datasets
We start by re-loading the data required for these models.
1.1 Load Pre-processed Transactional Data
retail_transaction_data_tbl <- read_rds("data/retail_data_cleaned_tbl.rds")
retail_transaction_data_tbl %>% glimpse()## Rows: 1,021,424
## Columns: 23
## $ row_id <chr> "ROW0000001", "ROW0000002", "ROW0000003", "ROW000000…
## $ excel_sheet <chr> "Year 2009-2010", "Year 2009-2010", "Year 2009-2010"…
## $ invoice_id <chr> "489434", "489434", "489434", "489434", "489434", "4…
## $ stock_code <chr> "85048", "79323P", "79323W", "22041", "21232", "2206…
## $ description <chr> "15CM CHRISTMAS GLASS BALL 20 LIGHTS", "PINK CHERRY …
## $ quantity <dbl> 12, 12, 12, 48, 24, 24, 24, 10, 12, 12, 24, 12, 10, …
## $ invoice_date <date> 2009-12-01, 2009-12-01, 2009-12-01, 2009-12-01, 200…
## $ price <dbl> 6.95, 6.75, 6.75, 2.10, 1.25, 1.65, 1.25, 5.95, 2.55…
## $ customer_id <chr> "13085", "13085", "13085", "13085", "13085", "13085"…
## $ country <chr> "United Kingdom", "United Kingdom", "United Kingdom"…
## $ stock_code_upr <chr> "85048", "79323P", "79323W", "22041", "21232", "2206…
## $ cancellation <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ invoice_dttm <dttm> 2009-12-01 07:45:00, 2009-12-01 07:45:00, 2009-12-0…
## $ invoice_month <chr> "December", "December", "December", "December", "Dec…
## $ invoice_dow <chr> "Tuesday", "Tuesday", "Tuesday", "Tuesday", "Tuesday…
## $ invoice_dom <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01"…
## $ invoice_hour <chr> "07", "07", "07", "07", "07", "07", "07", "07", "07"…
## $ invoice_minute <chr> "45", "45", "45", "45", "45", "45", "45", "45", "45"…
## $ invoice_woy <chr> "49", "49", "49", "49", "49", "49", "49", "49", "49"…
## $ invoice_ym <chr> "200912", "200912", "200912", "200912", "200912", "2…
## $ stock_value <dbl> 83.40, 81.00, 81.00, 100.80, 30.00, 39.60, 30.00, 59…
## $ invoice_monthprop <dbl> 0.04347826, 0.04347826, 0.04347826, 0.04347826, 0.04…
## $ exclude <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
We also want to load the customer transactional data.
customer_transactions_tbl <- read_rds("data/customer_transactions_tbl.rds")
customer_transactions_tbl %>% glimpse()## Rows: 37,031
## Columns: 4
## $ tnx_timestamp <dttm> 2009-12-01 07:45:00, 2009-12-01 07:45:59, 2009-12-01 09…
## $ customer_id <chr> "13085", "13085", "13078", "15362", "18102", "12682", "1…
## $ invoice_id <chr> "489434", "489435", "489436", "489437", "489438", "48943…
## $ total_spend <dbl> 505.30, 145.80, 630.33, 310.75, 2286.24, 426.30, 50.40, …
Finally, we want to load the various datasets used to fit the P/NBD models
btyd_fitdata_tbl <- read_rds("data/btyd_fitdata_tbl.rds")
btyd_fitdata_tbl %>% glimpse()## Rows: 4,716
## Columns: 6
## $ customer_id <chr> "12346", "12347", "12348", "12349", "12350", "12351", "…
## $ first_tnx_date <dttm> 2009-12-14 08:34:00, 2010-10-31 14:19:59, 2010-09-27 1…
## $ last_tnx_date <dttm> 2011-01-18 10:00:59, 2011-01-26 14:29:59, 2011-01-25 1…
## $ x <dbl> 11, 2, 2, 2, 0, 0, 6, 0, 0, 3, 1, 2, 7, 4, 3, 1, 1, 0, …
## $ t_x <dbl> 57.151488095, 12.429563492, 17.117361111, 25.970535714,…
## $ T_cal <dbl> 67.520437, 21.628968, 26.482242, 48.063492, 8.190377, 1…
btyd_obs_stats_tbl <- read_rds("data/btyd_obs_stats_tbl.rds")
btyd_obs_stats_tbl %>% glimpse()## Rows: 3,849
## Columns: 4
## $ customer_id <chr> "12347", "12348", "12349", "12352", "12353", "12354", "123…
## $ tnx_count <int> 5, 2, 1, 3, 1, 1, 1, 2, 1, 2, 2, 3, 9, 2, 4, 1, 1, 2, 2, 1…
## $ first_tnx <dttm> 2011-04-07 10:43:00, 2011-04-05 10:47:00, 2011-11-21 09:5…
## $ last_tnx <dttm> 2011-12-07 15:51:59, 2011-09-25 13:13:00, 2011-11-21 09:5…
extreme_customers_tbl <- read_rds("data/extreme_customers_tbl.rds")
extreme_customers_tbl %>% glimpse()## Rows: 1,637
## Columns: 6
## $ customer_id <chr> "12350", "12351", "12353", "12355", "12357", "12365", "…
## $ first_tnx_date <dttm> 2011-02-02 16:00:59, 2010-11-29 15:23:00, 2010-10-27 1…
## $ last_tnx_date <dttm> 2011-02-02 16:00:59, 2010-11-29 15:23:00, 2010-10-27 1…
## $ x <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ t_x <dbl> 0.000000000, 0.000000000, 0.000000000, 0.000000000, 0.0…
## $ T_cal <dbl> 8.190377, 17.479861, 22.209921, 44.928671, 19.368552, 5…
1.2 Construct Customer Cohorts
Later in this worksheet we will want to create customer cohorts based on the date of the first transaction for each customer.
customer_cohortdata_tbl <- customer_transactions_tbl %>%
group_by(customer_id) %>%
summarise(
.groups = "drop",
first_tnx_date = min(tnx_timestamp)
) %>%
mutate(
cohort_qtr = first_tnx_date %>% as.yearqtr(),
cohort_ym = first_tnx_date %>% format("%Y %m"),
.after = "customer_id"
)
customer_cohortdata_tbl %>% glimpse()## Rows: 5,878
## Columns: 4
## $ customer_id <chr> "12346", "12347", "12348", "12349", "12350", "12351", "…
## $ cohort_qtr <yearqtr> 2009 Q4, 2010 Q4, 2010 Q3, 2010 Q2, 2011 Q1, 2010 Q…
## $ cohort_ym <chr> "2009 12", "2010 10", "2010 09", "2010 04", "2011 02", …
## $ first_tnx_date <dttm> 2009-12-14 08:34:00, 2010-10-31 14:19:59, 2010-09-27 1…
2 Fit Initial BG/NBD Model
stan_modeldir <- "stan_models"
stan_codedir <- "stan_code"## functions {
## #include util_functions.stan
## }
##
## data {
## int<lower=1> n; // number of customers
##
## vector<lower=0>[n] t_x; // time to most recent purchase
## vector<lower=0>[n] T_cal; // total observation time
## vector<lower=0>[n] x; // number of purchases observed
##
## real<lower=0> lambda_mn; // prior mean for lambda
## real<lower=0> lambda_cv; // prior cv for lambda
##
## real<lower=0,upper=1> p_mn; // prior mean for p
## real<lower=0> p_k; // prior strength for p
## }
##
##
## transformed data {
## real<lower=0> r = 1 / (lambda_cv * lambda_cv);
## real<lower=0> alpha = 1 / (lambda_cv * lambda_cv * lambda_mn);
##
## real<lower=0> a = p_k * p_mn;
## real<lower=0> b = p_k * (1 - p_mn);
## }
##
##
## parameters {
## vector<lower=0>[n] lambda; // purchase rate
## vector<lower=0,upper=1>[n] p; // dropout probabilit
## }
##
##
## model {
## // setting priors
## lambda ~ gamma(r, alpha);
## p ~ beta (a, b);
##
## target += calculate_bgnbd_loglik(n, lambda, p, x, t_x, T_cal);
## }
We now compile this model using CmdStanR.
bgnbd_fixed_stanmodel <- cmdstan_model(
"stan_code/bgnbd_fixed.stan",
include_paths = stan_codedir,
pedantic = TRUE,
dir = stan_modeldir
)2.1 Fit the Model
We then use this compiled model with our data to produce a fit of the data.
stan_modelname <- "bgnbd_fixed"
stanfit_prefix <- str_c("fit_", stan_modelname)
stan_data_lst <- btyd_fitdata_tbl %>%
select(customer_id, x, t_x, T_cal) %>%
compose_data(
lambda_mn = 0.25,
lambda_cv = 1.00,
p_mn = 0.10,
p_k = 10.00,
)
bgnbd_fixed_stanfit <- bgnbd_fixed_stanmodel$sample(
data = stan_data_lst,
chains = 4,
iter_warmup = 500,
iter_sampling = 500,
seed = 4201,
save_warmup = TRUE,
output_dir = stan_modeldir,
output_basename = stanfit_prefix,
)## Running MCMC with 4 chains, at most 8 in parallel...
##
## Chain 1 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 3 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 4 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 3 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 4 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 3 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 4 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 3 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 4 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 3 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 4 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 3 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 3 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 4 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 4 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 3 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 4 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 3 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 4 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 3 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 4 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 3 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 4 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 2 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 2 finished in 178.2 seconds.
## Chain 3 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 3 finished in 180.8 seconds.
## Chain 1 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 1 finished in 181.5 seconds.
## Chain 4 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 4 finished in 182.9 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 180.9 seconds.
## Total execution time: 183.6 seconds.
bgnbd_fixed_stanfit$summary()## # A tibble: 9,433 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lp__ -8.32e+4 -8.32e+4 75.7 74.6 -8.33e+4 -8.30e+4 1.00 629.
## 2 lambda[1] 1.76e-1 1.70e-1 0.0535 0.0509 9.92e-2 2.71e-1 0.999 1891.
## 3 lambda[2] 1.33e-1 1.16e-1 0.0888 0.0725 2.95e-2 2.96e-1 1.00 1591.
## 4 lambda[3] 1.06e-1 9.30e-2 0.0663 0.0555 2.59e-2 2.33e-1 1.00 1891.
## 5 lambda[4] 6.95e-2 5.92e-2 0.0452 0.0369 1.68e-2 1.54e-1 0.999 1991.
## 6 lambda[5] 1.30e-1 7.27e-2 0.172 0.0817 4.97e-3 4.67e-1 1.00 1735.
## 7 lambda[6] 1.26e-1 5.83e-2 0.178 0.0666 4.35e-3 4.82e-1 1.00 1819.
## 8 lambda[7] 2.94e-1 2.81e-1 0.108 0.105 1.39e-1 4.85e-1 1.00 1759.
## 9 lambda[8] 1.30e-1 4.89e-2 0.206 0.0593 2.42e-3 5.39e-1 1.00 1587.
## 10 lambda[9] 1.60e-1 6.42e-2 0.225 0.0844 3.02e-3 6.22e-1 1.00 1134.
## # … with 9,423 more rows, and 1 more variable: ess_tail <dbl>
We have some basic HMC-based validity statistics we can check.
bgnbd_fixed_stanfit$cmdstan_diagnose()## Processing csv files: /home/rstudio/workshop/stan_models/fit_bgnbd_fixed-1.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_fixed-2.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_fixed-3.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_fixed-4.csvWarning: non-fatal error reading adaptation data
##
##
## Checking sampler transitions treedepth.
## Treedepth satisfactory for all transitions.
##
## Checking sampler transitions for divergences.
## No divergent transitions found.
##
## Checking E-BFMI - sampler transitions HMC potential energy.
## E-BFMI satisfactory.
##
## Effective sample size satisfactory.
##
## Split R-hat values satisfactory all parameters.
##
## Processing complete, no problems detected.
2.2 Visual Diagnostics of the Sample Validity
Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.
sample_params <- c(
"lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]", "lambda[5]", "lambda[6]",
"p[1]", "p[2]", "p[3]", "p[4]", "p[5]", "p[6]"
)
bgnbd_fixed_stanfit$draws(inc_warmup = FALSE) %>%
mcmc_trace(pars = sample_params) +
expand_limits(y = 0) +
labs(
x = "Iteration",
y = "Value",
title = "Traceplot of Sample of Lambda and p Values"
) +
theme(axis.text.x = element_text(size = 10))A common MCMC diagnostic is \(\hat{R}\) - which is a measure of the ‘similarity’ of the chains.
bgnbd_fixed_stanfit %>%
rhat(pars = c("lambda", "p")) %>%
mcmc_rhat() +
ggtitle("Plot of Parameter R-hat Values")Related to this quantity is the concept of effective sample size, \(N_{eff}\), an estimate of the size of the sample from a statistical information point of view.
bgnbd_fixed_stanfit %>%
neff_ratio(pars = c("lambda", "p")) %>%
mcmc_neff() +
ggtitle("Plot of Parameter Effective Sample Sizes")Finally, we also want to look at autocorrelation in the chains for each parameter.
bgnbd_fixed_stanfit$draws() %>%
mcmc_acf(pars = sample_params) +
ggtitle("Autocorrelation Plot of Sample Values")2.3 Validate the Hierarchical Lambda-Mean Model
We now want to validate the outputs of this model by looking at the posteriors
for both \(\lambda\), \(\mu\) and p_alive, and then running our simulations using
these outputs.
bgnbd_fixed_validation_tbl <- bgnbd_fixed_stanfit %>%
recover_types(btyd_fitdata_tbl) %>%
spread_draws(lambda[customer_id], p[customer_id]) %>%
ungroup() %>%
select(
customer_id, draw_id = .draw, post_lambda = lambda, post_p = p
)
bgnbd_fixed_validation_tbl %>% glimpse()## Rows: 9,432,000
## Columns: 4
## $ customer_id <chr> "12346", "12346", "12346", "12346", "12346", "12346", "123…
## $ draw_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ post_lambda <dbl> 0.243379, 0.134271, 0.390812, 0.175006, 0.252511, 0.140594…
## $ post_p <dbl> 0.16930600, 0.01793380, 0.25364600, 0.02760890, 0.13716200…
Having constructed our simulations inputs, we now generate our simulations.
precompute_dir <- "precompute/bgnbd_fixed"
precomputed_tbl <- dir_ls(precompute_dir) %>%
enframe(name = NULL, value = "sim_file") %>%
mutate(sim_file = sim_file %>% as.character())
bgnbd_fixed_validsims_lookup_tbl <- bgnbd_fixed_validation_tbl %>%
group_nest(customer_id, .key = "cust_params") %>%
mutate(
sim_file = glue(
"{precompute_dir}/sims_bgnbd_fixed_{customer_id}.rds"
)
)
exec_tbl <- bgnbd_fixed_validsims_lookup_tbl %>%
anti_join(precomputed_tbl, by = "sim_file")
if(exec_tbl %>% nrow() > 0) {
exec_tbl %>%
mutate(
calc_file = future_map2_lgl(
sim_file, cust_params,
run_bgnbd_simulations_chunk,
start_dttm = as.POSIXct("2011-04-01"),
end_dttm = as.POSIXct("2011-12-10"),
.options = furrr_options(
globals = c(
"calculate_event_times", "rgamma_mucv", "gamma_mucv2shaperate",
"generate_bgnbd_validation_transactions"
),
packages = c("tidyverse", "fs"),
scheduling = FALSE,
seed = 4202
),
.progress = TRUE
)
)
}
exec_tbl %>% glimpse()## Rows: 0
## Columns: 3
## $ customer_id <chr>
## $ cust_params <list<tibble[,3]>> list()
## $ sim_file <glue>
bgnbd_fixed_validsims_lookup_tbl %>% glimpse()## Rows: 4,716
## Columns: 3
## $ customer_id <chr> "12346", "12347", "12348", "12349", "12350", "12351", "123…
## $ cust_params <list<tibble[,3]>> [<tbl_df[2000 x 3]>], [<tbl_df[2000 x 3]>], […
## $ sim_file <glue> "precompute/bgnbd_fixed/sims_bgnbd_fixed_12346.rds", "pre…
We now load all the simulations into a file.
bgnbd_fixed_validsims_tbl <- bgnbd_fixed_validsims_lookup_tbl %>%
mutate(
data = map(sim_file, ~ .x %>% read_rds() %>% select(draw_id, sim_tnx_count, sim_tnx_last))
) %>%
select(customer_id, sim_file, data) %>%
unnest(data)
bgnbd_fixed_validsims_tbl %>% glimpse()## Rows: 9,432,000
## Columns: 5
## $ customer_id <chr> "12346", "12346", "12346", "12346", "12346", "12346", "1…
## $ sim_file <glue> "precompute/bgnbd_fixed/sims_bgnbd_fixed_12346.rds", "p…
## $ draw_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ sim_tnx_count <int> 4, 5, 0, 4, 5, 2, 7, 1, 9, 7, 0, 6, 4, 5, 8, 7, 3, 0, 7,…
## $ sim_tnx_last <dttm> 2011-05-30 18:44:35, 2011-10-14 23:48:52, NA, 2011-11-2…
We also want to look at how our simulated data compares to the transaction data that occurred after Apr 1 2011.
tnx_data_tbl <- btyd_obs_stats_tbl %>%
semi_join(bgnbd_fixed_validsims_tbl, by = "customer_id")
obs_customer_count <- tnx_data_tbl %>% nrow()
obs_total_tnx_count <- tnx_data_tbl %>% pull(tnx_count) %>% sum()
bgnbd_fixed_tnx_simsumm_tbl <- bgnbd_fixed_validsims_tbl %>%
group_by(draw_id) %>%
summarise(
.groups = "drop",
sim_customer_count = length(sim_tnx_count[sim_tnx_count > 0]),
sim_total_tnx_count = sum(sim_tnx_count)
)
ggplot(bgnbd_fixed_tnx_simsumm_tbl) +
geom_histogram(aes(x = sim_customer_count), binwidth = 10) +
geom_vline(aes(xintercept = obs_customer_count), colour = "red") +
labs(
x = "Simulated Customers With Transactions",
y = "Frequency",
title = "Histogram of Count of Customers Transacted",
subtitle = "Observed Count in Red"
)ggplot(bgnbd_fixed_tnx_simsumm_tbl) +
geom_histogram(aes(x = sim_total_tnx_count), binwidth = 50) +
geom_vline(aes(xintercept = obs_total_tnx_count), colour = "red") +
labs(
x = "Simulated Transaction Count",
y = "Frequency",
title = "Histogram of Count of Total Transaction Count",
subtitle = "Observed Count in Red"
)So we have gone from a strong under-estimation bias to a strong over-estimation bias, but the model is fitting much faster, so working on this.
3 Fit Hierarchical Mean BG/NBD Model
## functions {
## #include util_functions.stan
## }
##
## data {
## int<lower=1> n; // number of customers
##
## vector<lower=0>[n] t_x; // time to most recent purchase
## vector<lower=0>[n] T_cal; // total observation time
## vector<lower=0>[n] x; // number of purchases observed
##
## real lambda_mn_p1; // lambda mean prior p1
## real<lower=0> lambda_mn_p2; // lambda mean prior p2
##
## real<lower=0> lambda_cv;
##
## real<lower=0> p_mn_mu; // p mean prior p1
## real<lower=0> p_mn_k; // p mean prior p2
##
## real<lower=0> p_k;
## }
##
##
## transformed data {
## real<lower=0> r = 1 / (lambda_cv * lambda_cv);
##
## real<lower=0> p_mn_a = p_mn_k * p_mn_mu;
## real<lower=0> p_mn_b = p_mn_k * (1 - p_mn_mu);
## }
##
## parameters {
## real<lower=0> lambda_mn;
## real<lower=0,upper=1> p_mn;
##
## vector<lower=0>[n] lambda; // purchase rate
## vector<lower=0,upper=1>[n] p; // dropout probabilit
## }
##
##
## transformed parameters {
## real<lower=0> alpha = 1 / (lambda_cv * lambda_cv * lambda_mn);
##
## real<lower=0> p_a = p_k * p_mn;
## real<lower=0> p_b = p_k * (1 - p_mn);
## }
##
##
## model {
## // set hyper-priors
## lambda_mn ~ lognormal(lambda_mn_p1, lambda_mn_p2);
##
## p_mn ~ beta (p_mn_a, p_mn_b);
##
## // setting priors
## lambda ~ gamma(r, alpha);
## p ~ beta (p_a, p_b);
##
## target += calculate_bgnbd_loglik(n, lambda, p, x, t_x, T_cal);
## }
We now compile this model using CmdStanR.
bgnbd_hier_means_stanmodel <- cmdstan_model(
"stan_code/bgnbd_hier_means.stan",
include_paths = stan_codedir,
pedantic = TRUE,
dir = stan_modeldir
)3.1 Fit the Model
We then use this compiled model with our data to produce a fit of the data.
stan_modelname <- "bgnbd_hier_means"
stanfit_prefix <- str_c("fit_", stan_modelname)
stan_data_lst <- btyd_fitdata_tbl %>%
select(customer_id, x, t_x, T_cal) %>%
compose_data(
lambda_mn_p1 = log(1) - 0.5 * (1.0)^2,
lambda_mn_p2 = 1.0,
lambda_cv = 1.0,
p_mn_mu = 0.1,
p_mn_k = 10,
p_k = 10
)
bgnbd_hier_means_stanfit <- bgnbd_hier_means_stanmodel$sample(
data = stan_data_lst,
chains = 4,
iter_warmup = 500,
iter_sampling = 500,
seed = 4202,
save_warmup = TRUE,
output_dir = stan_modeldir,
output_basename = stanfit_prefix,
)## Running MCMC with 4 chains, at most 8 in parallel...
##
## Chain 1 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 3 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 4 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 4 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 3 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 4 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 3 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 4 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 4 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 3 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 4 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 4 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 3 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 4 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 3 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 3 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 4 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 3 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 4 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 3 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 4 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 3 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 4 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 4 finished in 239.1 seconds.
## Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 3 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 2 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 2 finished in 246.2 seconds.
## Chain 1 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 1 finished in 253.0 seconds.
## Chain 3 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 3 finished in 254.8 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 248.3 seconds.
## Total execution time: 255.5 seconds.
bgnbd_hier_means_stanfit$summary()## # A tibble: 9,438 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lp__ -6.41e+4 -6.41e+4 1.03e+2 9.99e+1 -6.42e+4 -6.39e+4 1.02 131.
## 2 lambda_mn 1.21e-1 1.21e-1 2.57e-3 2.61e-3 1.17e-1 1.26e-1 1.01 567.
## 3 p_mn 1.70e-1 1.70e-1 5.55e-3 5.57e-3 1.61e-1 1.79e-1 1.04 97.7
## 4 lambda[1] 1.66e-1 1.61e-1 4.86e-2 4.75e-2 9.64e-2 2.52e-1 0.999 2807.
## 5 lambda[2] 1.16e-1 1.02e-1 7.01e-2 6.13e-2 3.01e-2 2.47e-1 1.00 1821.
## 6 lambda[3] 9.46e-2 8.52e-2 5.50e-2 5.15e-2 2.59e-2 2.02e-1 1.00 2220.
## 7 lambda[4] 6.82e-2 5.82e-2 4.49e-2 3.62e-2 1.66e-2 1.49e-1 1.00 3056.
## 8 lambda[5] 8.03e-2 5.16e-2 8.97e-2 5.53e-2 3.29e-3 2.53e-1 1.00 2252.
## 9 lambda[6] 7.37e-2 4.09e-2 9.43e-2 4.50e-2 2.86e-3 2.69e-1 1.00 2399.
## 10 lambda[7] 2.50e-1 2.37e-1 9.86e-2 9.21e-2 1.12e-1 4.36e-1 1.00 3406.
## # … with 9,428 more rows, and 1 more variable: ess_tail <dbl>
We have some basic HMC-based validity statistics we can check.
bgnbd_hier_means_stanfit$cmdstan_diagnose()## Processing csv files: /home/rstudio/workshop/stan_models/fit_bgnbd_hier_means-1.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_hier_means-2.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_hier_means-3.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_hier_means-4.csvWarning: non-fatal error reading adaptation data
##
##
## Checking sampler transitions treedepth.
## Treedepth satisfactory for all transitions.
##
## Checking sampler transitions for divergences.
## No divergent transitions found.
##
## Checking E-BFMI - sampler transitions HMC potential energy.
## E-BFMI satisfactory.
##
## Effective sample size satisfactory.
##
## Split R-hat values satisfactory all parameters.
##
## Processing complete, no problems detected.
3.2 Visual Diagnostics of the Sample Validity
Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.
sample_params <- c(
"lambda_mn", "p_mn",
"lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]", "lambda[5]",
"p[1]", "p[2]", "p[3]", "p[4]", "p[5]"
)
bgnbd_hier_means_stanfit$draws(inc_warmup = FALSE) %>%
mcmc_trace(pars = sample_params) +
expand_limits(y = 0) +
labs(
x = "Iteration",
y = "Value",
title = "Traceplot of Sample of Lambda and p Values"
) +
theme(axis.text.x = element_text(size = 10))A common MCMC diagnostic is \(\hat{R}\) - which is a measure of the ‘similarity’ of the chains.
bgnbd_hier_means_stanfit %>%
rhat(pars = c("lambda_mn", "p_mn", "lambda", "p")) %>%
mcmc_rhat() +
ggtitle("Plot of Parameter R-hat Values")Related to this quantity is the concept of effective sample size, \(N_{eff}\), an estimate of the size of the sample from a statistical information point of view.
bgnbd_hier_means_stanfit %>%
neff_ratio(pars = c("lambda_mn", "p_mn", "lambda", "p")) %>%
mcmc_neff() +
ggtitle("Plot of Parameter Effective Sample Sizes")3.3 Validate the Hierarchical Lambda-Mean Model
We now want to validate the outputs of this model by looking at the posteriors
for both \(\lambda\), \(\mu\) and p_alive, and then running our simulations using
these outputs.
bgnbd_hier_means_validation_tbl <- bgnbd_hier_means_stanfit %>%
recover_types(btyd_fitdata_tbl) %>%
spread_draws(lambda[customer_id], p[customer_id]) %>%
ungroup() %>%
select(
customer_id, draw_id = .draw, post_lambda = lambda, post_p = p
)
bgnbd_hier_means_validation_tbl %>% glimpse()## Rows: 9,432,000
## Columns: 4
## $ customer_id <chr> "12346", "12346", "12346", "12346", "12346", "12346", "123…
## $ draw_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ post_lambda <dbl> 0.0953844, 0.0853009, 0.0811996, 0.2378080, 0.1220780, 0.2…
## $ post_p <dbl> 0.0948068, 0.0799318, 0.1330810, 0.1203820, 0.0702531, 0.0…
Having constructed our simulations inputs, we now generate our simulations.
precompute_dir <- "precompute/bgnbd_hier_means"
precomputed_tbl <- dir_ls(precompute_dir) %>%
enframe(name = NULL, value = "sim_file") %>%
mutate(sim_file = sim_file %>% as.character())
bgnbd_hier_means_validsims_lookup_tbl <- bgnbd_hier_means_validation_tbl %>%
group_nest(customer_id, .key = "cust_params") %>%
mutate(
sim_file = glue(
"{precompute_dir}/sims_bgnbd_hier_means_{customer_id}.rds"
)
)
exec_tbl <- bgnbd_hier_means_validsims_lookup_tbl %>%
anti_join(precomputed_tbl, by = "sim_file")
if(exec_tbl %>% nrow() > 0) {
exec_tbl %>%
mutate(
calc_file = future_map2_lgl(
sim_file, cust_params,
run_bgnbd_simulations_chunk,
start_dttm = as.POSIXct("2011-04-01"),
end_dttm = as.POSIXct("2011-12-10"),
.options = furrr_options(
globals = c(
"calculate_event_times", "rgamma_mucv", "gamma_mucv2shaperate",
"generate_bgnbd_validation_transactions"
),
packages = c("tidyverse", "fs"),
scheduling = FALSE,
seed = 4202
),
.progress = TRUE
)
)
}## # A tibble: 4,716 × 4
## customer_id cust_params sim_file calc_file
## <chr> <list<tibble[,3]>> <glue> <lgl>
## 1 12346 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 2 12347 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 3 12348 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 4 12349 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 5 12350 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 6 12351 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 7 12352 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 8 12353 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 9 12355 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## 10 12356 [2,000 × 3] precompute/bgnbd_hier_means/sims_bg… TRUE
## # … with 4,706 more rows
exec_tbl %>% glimpse()## Rows: 4,716
## Columns: 3
## $ customer_id <chr> "12346", "12347", "12348", "12349", "12350", "12351", "123…
## $ cust_params <list<tibble[,3]>> [<tbl_df[2000 x 3]>], [<tbl_df[2000 x 3]>], […
## $ sim_file <glue> "precompute/bgnbd_hier_means/sims_bgnbd_hier_means_12346.…
bgnbd_hier_means_validsims_lookup_tbl %>% glimpse()## Rows: 4,716
## Columns: 3
## $ customer_id <chr> "12346", "12347", "12348", "12349", "12350", "12351", "123…
## $ cust_params <list<tibble[,3]>> [<tbl_df[2000 x 3]>], [<tbl_df[2000 x 3]>], […
## $ sim_file <glue> "precompute/bgnbd_hier_means/sims_bgnbd_hier_means_12346.…
We now load all the simulations into a file.
bgnbd_hier_means_validsims_tbl <- bgnbd_hier_means_validsims_lookup_tbl %>%
mutate(
data = map(sim_file, ~ .x %>% read_rds() %>% select(draw_id, sim_tnx_count, sim_tnx_last))
) %>%
select(customer_id, sim_file, data) %>%
unnest(data)
bgnbd_hier_means_validsims_tbl %>% glimpse()## Rows: 9,432,000
## Columns: 5
## $ customer_id <chr> "12346", "12346", "12346", "12346", "12346", "12346", "1…
## $ sim_file <glue> "precompute/bgnbd_hier_means/sims_bgnbd_hier_means_1234…
## $ draw_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ sim_tnx_count <int> 4, 3, 1, 3, 1, 2, 1, 3, 9, 1, 2, 12, 0, 7, 9, 3, 7, 6, 3…
## $ sim_tnx_last <dttm> 2011-08-31 12:49:57, 2011-11-09 15:56:17, 2011-05-19 05…
We also want to look at how our simulated data compares to the transaction data that occurred after Apr 1 2011.
tnx_data_tbl <- btyd_obs_stats_tbl %>%
semi_join(bgnbd_hier_means_validsims_tbl, by = "customer_id")
obs_customer_count <- tnx_data_tbl %>% nrow()
obs_total_tnx_count <- tnx_data_tbl %>% pull(tnx_count) %>% sum()
bgnbd_hier_means_tnx_simsumm_tbl <- bgnbd_hier_means_validsims_tbl %>%
group_by(draw_id) %>%
summarise(
.groups = "drop",
sim_customer_count = length(sim_tnx_count[sim_tnx_count > 0]),
sim_total_tnx_count = sum(sim_tnx_count)
)
ggplot(bgnbd_hier_means_tnx_simsumm_tbl) +
geom_histogram(aes(x = sim_customer_count), binwidth = 10) +
geom_vline(aes(xintercept = obs_customer_count), colour = "red") +
labs(
x = "Simulated Customers With Transactions",
y = "Frequency",
title = "Histogram of Count of Customers Transacted",
subtitle = "Observed Count in Red"
)ggplot(bgnbd_hier_means_tnx_simsumm_tbl) +
geom_histogram(aes(x = sim_total_tnx_count), binwidth = 50) +
geom_vline(aes(xintercept = obs_total_tnx_count), colour = "red") +
labs(
x = "Simulated Transaction Count",
y = "Frequency",
title = "Histogram of Count of Total Transaction Count",
subtitle = "Observed Count in Red"
)4 R Environment
options(width = 120L)
sessioninfo::session_info()## ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.0 (2022-04-22)
## os Ubuntu 20.04.5 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Etc/UTC
## date 2022-10-31
## pandoc 2.17.1.1 @ /usr/local/bin/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [1] RSPM (R 4.2.0)
## arrayhelpers 1.1-0 2020-02-04 [1] RSPM (R 4.2.0)
## assertthat 0.2.1 2019-03-21 [1] RSPM (R 4.2.0)
## backports 1.4.1 2021-12-13 [1] RSPM (R 4.2.0)
## base64enc 0.1-3 2015-07-28 [1] RSPM (R 4.2.0)
## bayesplot * 1.9.0 2022-03-10 [1] RSPM (R 4.2.0)
## bit 4.0.4 2020-08-04 [1] RSPM (R 4.2.0)
## bit64 4.0.5 2020-08-30 [1] RSPM (R 4.2.0)
## bookdown 0.27 2022-06-14 [1] RSPM (R 4.2.0)
## boot 1.3-28 2021-05-03 [2] CRAN (R 4.2.0)
## bridgesampling 1.1-2 2021-04-16 [1] RSPM (R 4.2.0)
## brms * 2.18.0 2022-10-28 [1] Github (paul-buerkner/brms@28f778d)
## Brobdingnag 1.2-7 2022-02-03 [1] RSPM (R 4.2.0)
## broom 0.8.0 2022-04-13 [1] RSPM (R 4.2.0)
## bslib 0.3.1 2021-10-06 [1] RSPM (R 4.2.0)
## cachem 1.0.6 2021-08-19 [1] RSPM (R 4.2.0)
## callr 3.7.0 2021-04-20 [1] RSPM (R 4.2.0)
## cellranger 1.1.0 2016-07-27 [1] RSPM (R 4.2.0)
## checkmate 2.1.0 2022-04-21 [1] RSPM (R 4.2.0)
## cli 3.3.0 2022-04-25 [1] RSPM (R 4.2.0)
## cmdstanr * 0.5.3 2022-10-28 [1] Github (stan-dev/cmdstanr@22b391e)
## coda 0.19-4 2020-09-30 [1] RSPM (R 4.2.0)
## codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.0)
## colorspace 2.0-3 2022-02-21 [1] RSPM (R 4.2.0)
## colourpicker 1.1.1 2021-10-04 [1] RSPM (R 4.2.0)
## conflicted * 1.1.0 2021-11-26 [1] RSPM (R 4.2.0)
## cowplot * 1.1.1 2020-12-30 [1] RSPM (R 4.2.0)
## crayon 1.5.1 2022-03-26 [1] RSPM (R 4.2.0)
## crosstalk 1.2.0 2021-11-04 [1] RSPM (R 4.2.0)
## curl 4.3.2 2021-06-23 [1] RSPM (R 4.2.0)
## data.table 1.14.2 2021-09-27 [1] RSPM (R 4.2.0)
## DBI 1.1.3 2022-06-18 [1] RSPM (R 4.2.0)
## dbplyr 2.2.0 2022-06-05 [1] RSPM (R 4.2.0)
## digest 0.6.29 2021-12-01 [1] RSPM (R 4.2.0)
## directlabels * 2021.1.13 2021-01-16 [1] RSPM (R 4.2.0)
## distributional 0.3.0 2022-01-05 [1] RSPM (R 4.2.0)
## dplyr * 1.0.9 2022-04-28 [1] RSPM (R 4.2.0)
## DT 0.23 2022-05-10 [1] RSPM (R 4.2.0)
## dygraphs 1.1.1.6 2018-07-11 [1] RSPM (R 4.2.0)
## ellipsis 0.3.2 2021-04-29 [1] RSPM (R 4.2.0)
## evaluate 0.15 2022-02-18 [1] RSPM (R 4.2.0)
## fansi 1.0.3 2022-03-24 [1] RSPM (R 4.2.0)
## farver 2.1.0 2021-02-28 [1] RSPM (R 4.2.0)
## fastmap 1.1.0 2021-01-25 [1] RSPM (R 4.2.0)
## forcats * 0.5.1 2021-01-27 [1] RSPM (R 4.2.0)
## fs * 1.5.2 2021-12-08 [1] RSPM (R 4.2.0)
## furrr * 0.3.0 2022-05-04 [1] RSPM (R 4.2.0)
## future * 1.26.1 2022-05-27 [1] RSPM (R 4.2.0)
## gamm4 0.2-6 2020-04-03 [1] RSPM (R 4.2.0)
## generics 0.1.2 2022-01-31 [1] RSPM (R 4.2.0)
## ggdist 3.1.1 2022-02-27 [1] RSPM (R 4.2.0)
## ggplot2 * 3.3.6 2022-05-03 [1] RSPM (R 4.2.0)
## ggridges 0.5.3 2021-01-08 [1] RSPM (R 4.2.0)
## globals 0.15.0 2022-05-09 [1] RSPM (R 4.2.0)
## glue * 1.6.2 2022-02-24 [1] RSPM (R 4.2.0)
## gridExtra 2.3 2017-09-09 [1] RSPM (R 4.2.0)
## gtable 0.3.0 2019-03-25 [1] RSPM (R 4.2.0)
## gtools 3.9.2.2 2022-06-13 [1] RSPM (R 4.2.0)
## haven 2.5.0 2022-04-15 [1] RSPM (R 4.2.0)
## highr 0.9 2021-04-16 [1] RSPM (R 4.2.0)
## hms 1.1.1 2021-09-26 [1] RSPM (R 4.2.0)
## htmltools 0.5.2 2021-08-25 [1] RSPM (R 4.2.0)
## htmlwidgets 1.5.4 2021-09-08 [1] RSPM (R 4.2.0)
## httpuv 1.6.5 2022-01-05 [1] RSPM (R 4.2.0)
## httr 1.4.3 2022-05-04 [1] RSPM (R 4.2.0)
## igraph 1.3.2 2022-06-13 [1] RSPM (R 4.2.0)
## inline 0.3.19 2021-05-31 [1] RSPM (R 4.2.0)
## jquerylib 0.1.4 2021-04-26 [1] RSPM (R 4.2.0)
## jsonlite 1.8.0 2022-02-22 [1] RSPM (R 4.2.0)
## knitr 1.39 2022-04-26 [1] RSPM (R 4.2.0)
## labeling 0.4.2 2020-10-20 [1] RSPM (R 4.2.0)
## later 1.3.0 2021-08-18 [1] RSPM (R 4.2.0)
## lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.0)
## lifecycle 1.0.1 2021-09-24 [1] RSPM (R 4.2.0)
## listenv 0.8.0 2019-12-05 [1] RSPM (R 4.2.0)
## lme4 1.1-29 2022-04-07 [1] RSPM (R 4.2.0)
## loo 2.5.1 2022-03-24 [1] RSPM (R 4.2.0)
## lubridate * 1.8.0 2021-10-07 [1] RSPM (R 4.2.0)
## magrittr * 2.0.3 2022-03-30 [1] RSPM (R 4.2.0)
## markdown 1.1 2019-08-07 [1] RSPM (R 4.2.0)
## MASS 7.3-56 2022-03-23 [2] CRAN (R 4.2.0)
## Matrix 1.4-1 2022-03-23 [2] CRAN (R 4.2.0)
## matrixStats 0.62.0 2022-04-19 [1] RSPM (R 4.2.0)
## memoise 2.0.1 2021-11-26 [1] RSPM (R 4.2.0)
## mgcv 1.8-40 2022-03-29 [2] CRAN (R 4.2.0)
## mime 0.12 2021-09-28 [1] RSPM (R 4.2.0)
## miniUI 0.1.1.1 2018-05-18 [1] RSPM (R 4.2.0)
## minqa 1.2.4 2014-10-09 [1] RSPM (R 4.2.0)
## modelr 0.1.8 2020-05-19 [1] RSPM (R 4.2.0)
## munsell 0.5.0 2018-06-12 [1] RSPM (R 4.2.0)
## mvtnorm 1.1-3 2021-10-08 [1] RSPM (R 4.2.0)
## nlme 3.1-157 2022-03-25 [2] CRAN (R 4.2.0)
## nloptr 2.0.3 2022-05-26 [1] RSPM (R 4.2.0)
## parallelly 1.32.0 2022-06-07 [1] RSPM (R 4.2.0)
## PerformanceAnalytics * 2.0.4 2020-02-06 [1] RSPM (R 4.2.0)
## pillar 1.7.0 2022-02-01 [1] RSPM (R 4.2.0)
## pkgbuild 1.3.1 2021-12-20 [1] RSPM (R 4.2.0)
## pkgconfig 2.0.3 2019-09-22 [1] RSPM (R 4.2.0)
## plyr 1.8.7 2022-03-24 [1] RSPM (R 4.2.0)
## posterior * 1.2.2 2022-06-09 [1] RSPM (R 4.2.0)
## prettyunits 1.1.1 2020-01-24 [1] RSPM (R 4.2.0)
## processx 3.6.1 2022-06-17 [1] RSPM (R 4.2.0)
## projpred 2.1.2 2022-05-13 [1] RSPM (R 4.2.0)
## promises 1.2.0.1 2021-02-11 [1] RSPM (R 4.2.0)
## ps 1.7.1 2022-06-18 [1] RSPM (R 4.2.0)
## purrr * 0.3.4 2020-04-17 [1] RSPM (R 4.2.0)
## quadprog 1.5-8 2019-11-20 [1] RSPM (R 4.2.0)
## Quandl 2.11.0 2021-08-11 [1] RSPM (R 4.2.0)
## quantmod * 0.4.20 2022-04-29 [1] RSPM (R 4.2.0)
## R6 2.5.1 2021-08-19 [1] RSPM (R 4.2.0)
## Rcpp * 1.0.8.3 2022-03-17 [1] RSPM (R 4.2.0)
## RcppParallel 5.1.5 2022-01-05 [1] RSPM (R 4.2.0)
## readr * 2.1.2 2022-01-30 [1] RSPM (R 4.2.0)
## readxl 1.4.0 2022-03-28 [1] RSPM (R 4.2.0)
## reprex 2.0.1 2021-08-05 [1] RSPM (R 4.2.0)
## reshape2 1.4.4 2020-04-09 [1] RSPM (R 4.2.0)
## rlang * 1.0.2 2022-03-04 [1] RSPM (R 4.2.0)
## rmarkdown 2.14 2022-04-25 [1] RSPM (R 4.2.0)
## rmdformats 1.0.4 2022-05-17 [1] RSPM (R 4.2.0)
## rstan 2.26.13 2022-10-28 [1] local
## rstantools 2.2.0 2022-04-08 [1] RSPM (R 4.2.0)
## rstudioapi 0.13 2020-11-12 [1] RSPM (R 4.2.0)
## rvest 1.0.2 2021-10-16 [1] RSPM (R 4.2.0)
## sass 0.4.1 2022-03-23 [1] RSPM (R 4.2.0)
## scales * 1.2.0 2022-04-13 [1] RSPM (R 4.2.0)
## sessioninfo 1.2.2 2021-12-06 [1] RSPM (R 4.2.0)
## shiny 1.7.1 2021-10-02 [1] RSPM (R 4.2.0)
## shinyjs 2.1.0 2021-12-23 [1] RSPM (R 4.2.0)
## shinystan 2.6.0 2022-03-03 [1] RSPM (R 4.2.0)
## shinythemes 1.2.0 2021-01-25 [1] RSPM (R 4.2.0)
## StanHeaders 2.26.13 2022-10-28 [1] local
## stringi 1.7.6 2021-11-29 [1] RSPM (R 4.2.0)
## stringr * 1.4.0 2019-02-10 [1] RSPM (R 4.2.0)
## svUnit 1.0.6 2021-04-19 [1] RSPM (R 4.2.0)
## tensorA 0.36.2 2020-11-19 [1] RSPM (R 4.2.0)
## threejs 0.3.3 2020-01-21 [1] RSPM (R 4.2.0)
## tibble * 3.1.7 2022-05-03 [1] RSPM (R 4.2.0)
## tidybayes * 3.0.2.9000 2022-10-28 [1] Github (mjskay/tidybayes@1efbdef)
## tidyquant * 1.0.4 2022-05-20 [1] RSPM (R 4.2.0)
## tidyr * 1.2.0 2022-02-01 [1] RSPM (R 4.2.0)
## tidyselect 1.1.2 2022-02-21 [1] RSPM (R 4.2.0)
## tidyverse * 1.3.1 2021-04-15 [1] RSPM (R 4.2.0)
## TTR * 0.24.3 2021-12-12 [1] RSPM (R 4.2.0)
## tzdb 0.3.0 2022-03-28 [1] RSPM (R 4.2.0)
## utf8 1.2.2 2021-07-24 [1] RSPM (R 4.2.0)
## V8 4.2.0 2022-05-14 [1] RSPM (R 4.2.0)
## vctrs 0.4.1 2022-04-13 [1] RSPM (R 4.2.0)
## vroom 1.5.7 2021-11-30 [1] RSPM (R 4.2.0)
## withr 2.5.0 2022-03-03 [1] RSPM (R 4.2.0)
## xfun 0.31 2022-05-10 [1] RSPM (R 4.2.0)
## xml2 1.3.3 2021-11-30 [1] RSPM (R 4.2.0)
## xtable 1.8-4 2019-04-21 [1] RSPM (R 4.2.0)
## xts * 0.12.1 2020-09-09 [1] RSPM (R 4.2.0)
## yaml 2.3.5 2022-02-21 [1] RSPM (R 4.2.0)
## zoo * 1.8-10 2022-04-15 [1] RSPM (R 4.2.0)
##
## [1] /usr/local/lib/R/site-library
## [2] /usr/local/lib/R/library
##
## ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
options(width = 80L)